import requests
import re
try:
    # Python2
    from urlparse import urlparse
    from urlparse import urljoin
except ImportError:
    # Python3
    from urllib.parse import urljoin
    from urllib.parse import urlparse

headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"}

# 测试url是否可以访问


def testUrl(url):
    try:
        status_code = requests.get(url, headers=headers, timeout=5).status_code
        return status_code
    except Exception as e:
        print(e)
        return 1000
# 从url获取域名


def getdomains(url):
    return '{uri.netloc}'.format(uri=urlparse(url))

# 正则判断url是否符合规则


def match(url):
    regex = re.compile(r'^https?:/{2}\w.+$')
    return re.match(regex, str(url))
